1
2
3
4
5
6 package ch.twiddlefinger.inet.rewinder.model.parser.conversion;
7
8 import java.io.CharConversionException;
9 import java.io.IOException;
10 import java.io.OutputStream;
11 import java.io.Writer;
12
13
14 /***
15 * <p> This class represents an UTF-8 stream writer.</p>
16 *
17 * <p> This writer supports surrogate <code>char</code> pairs (representing
18 * characters in the range [U+10000 .. U+10FFFF]). It can also be used
19 * to write characters from their unicodes (31 bits) directly
20 * (ref. {@link #write(int)}).</p>
21 *
22 * <p> Instances of this class can be reused for different output streams
23 * and can be part of a higher level component (e.g. serializer) in order
24 * to avoid dynamic buffer allocation when the destination output changes.
25 * Also wrapping using a <code>java.io.BufferedWriter</code> is unnescessary
26 * as instances of this class embed their own data buffers.</p>
27
28 * <p> Note: This writer is unsynchronized and always produces well-formed
29 * UTF-8 sequences.</p>
30 *
31 * <p><i> This class is <b>public domain</b> (not copyrighted).</i></p>
32 *
33 * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
34 * @version 4.6, July 14, 2003
35 * @see Utf8StreamReader
36 */
37 public final class Utf8StreamWriter extends Writer {
38 /***
39 * Holds the current output stream or <code>null</code> if closed.
40 */
41 private OutputStream _outStream;
42
43 /***
44 * Holds the bytes' buffer.
45 */
46 private final byte[] _bytes;
47
48 /***
49 * Holds the bytes buffer index.
50 */
51 private int _index;
52 private char _highSurrogate;
53
54 /***
55 * Default constructor.
56 */
57 public Utf8StreamWriter() {
58 this(2048);
59 }
60
61 /***
62 * Creates a {@link Utf8StreamWriter} of specified buffer size.
63 *
64 * @param bufferSize the buffer size in bytes.
65 */
66 public Utf8StreamWriter(int bufferSize) {
67 _bytes = new byte[bufferSize];
68 }
69
70 /***
71 * Sets the output stream to use for writing until this writer is closed.
72 * For example:<pre>
73 * Writer writer = new Utf8StreamWriter().setOutputStream(outStream);
74 * </pre> is equivalent but writes faster than <pre>
75 * Writer writer = new java.io.OutputStreamWriter(outStream, "UTF-8");
76 * </pre>
77 *
78 * @param outStream the output stream.
79 * @return this UTF-8 writer.
80 * @see #close
81 */
82 public Utf8StreamWriter setOutputStream(OutputStream outStream) {
83 _outStream = outStream;
84
85 return this;
86 }
87
88 /***
89 * Writes a single character. This method supports 16-bits
90 * character surrogates.
91 *
92 * @param c <code>char</code> the character to be written (possibly
93 * a surrogate).
94 * @throws IOException if an I/O error occurs.
95 */
96 public void write(char c) throws IOException {
97 if ((c < 0xd800) || (c > 0xdfff)) {
98 write((int) c);
99 } else if (c < 0xdc00) {
100 _highSurrogate = c;
101 } else {
102
103 int code = ((_highSurrogate - 0xd800) << 10) + (c - 0xdc00) +
104 0x10000;
105 write(code);
106 }
107 }
108
109 /***
110 * Writes a character given its 31-bits Unicode.
111 *
112 * @param code the 31 bits Unicode of the character to be written.
113 * @throws IOException if an I/O error occurs.
114 */
115 public void write(int code) throws IOException {
116 if ((code & 0xffffff80) == 0) {
117 _bytes[_index] = (byte) code;
118
119 if (++_index >= _bytes.length) {
120 flushBuffer();
121 }
122 } else {
123 write2(code);
124 }
125 }
126
127 private void write2(int c) throws IOException {
128 if ((c & 0xfffff800) == 0) {
129 _bytes[_index] = (byte) (0xc0 | (c >> 6));
130
131 if (++_index >= _bytes.length) {
132 flushBuffer();
133 }
134
135 _bytes[_index] = (byte) (0x80 | (c & 0x3f));
136
137 if (++_index >= _bytes.length) {
138 flushBuffer();
139 }
140 } else if ((c & 0xffff0000) == 0) {
141 _bytes[_index] = (byte) (0xe0 | (c >> 12));
142
143 if (++_index >= _bytes.length) {
144 flushBuffer();
145 }
146
147 _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
148
149 if (++_index >= _bytes.length) {
150 flushBuffer();
151 }
152
153 _bytes[_index] = (byte) (0x80 | (c & 0x3f));
154
155 if (++_index >= _bytes.length) {
156 flushBuffer();
157 }
158 } else if ((c & 0xff200000) == 0) {
159 _bytes[_index] = (byte) (0xf0 | (c >> 18));
160
161 if (++_index >= _bytes.length) {
162 flushBuffer();
163 }
164
165 _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
166
167 if (++_index >= _bytes.length) {
168 flushBuffer();
169 }
170
171 _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
172
173 if (++_index >= _bytes.length) {
174 flushBuffer();
175 }
176
177 _bytes[_index] = (byte) (0x80 | (c & 0x3f));
178
179 if (++_index >= _bytes.length) {
180 flushBuffer();
181 }
182 } else if ((c & 0xf4000000) == 0) {
183 _bytes[_index] = (byte) (0xf8 | (c >> 24));
184
185 if (++_index >= _bytes.length) {
186 flushBuffer();
187 }
188
189 _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
190
191 if (++_index >= _bytes.length) {
192 flushBuffer();
193 }
194
195 _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
196
197 if (++_index >= _bytes.length) {
198 flushBuffer();
199 }
200
201 _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
202
203 if (++_index >= _bytes.length) {
204 flushBuffer();
205 }
206
207 _bytes[_index] = (byte) (0x80 | (c & 0x3f));
208
209 if (++_index >= _bytes.length) {
210 flushBuffer();
211 }
212 } else if ((c & 0x80000000) == 0) {
213 _bytes[_index] = (byte) (0xfc | (c >> 30));
214
215 if (++_index >= _bytes.length) {
216 flushBuffer();
217 }
218
219 _bytes[_index] = (byte) (0x80 | ((c >> 24) & 0x3f));
220
221 if (++_index >= _bytes.length) {
222 flushBuffer();
223 }
224
225 _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
226
227 if (++_index >= _bytes.length) {
228 flushBuffer();
229 }
230
231 _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3F));
232
233 if (++_index >= _bytes.length) {
234 flushBuffer();
235 }
236
237 _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3F));
238
239 if (++_index >= _bytes.length) {
240 flushBuffer();
241 }
242
243 _bytes[_index] = (byte) (0x80 | (c & 0x3F));
244
245 if (++_index >= _bytes.length) {
246 flushBuffer();
247 }
248 } else {
249 throw new CharConversionException("Illegal character U+" +
250 Integer.toHexString(c));
251 }
252 }
253
254 /***
255 * Writes a portion of an array of characters.
256 *
257 * @param cbuf the array of characters.
258 * @param off the offset from which to start writing characters.
259 * @param len the number of characters to write.
260 * @throws IOException if an I/O error occurs.
261 */
262 public void write(char[] cbuf, int off, int len) throws IOException {
263 final int off_plus_len = off + len;
264
265 for (int i = off; i < off_plus_len; i++) {
266 write(cbuf[i]);
267 }
268 }
269
270 /***
271 * Writes a portion of a string.
272 *
273 * @param str a String.
274 * @param off the offset from which to start writing characters.
275 * @param len the number of characters to write.
276 * @throws IOException if an I/O error occurs
277 */
278 public void write(String str, int off, int len) throws IOException {
279 final int off_plus_len = off + len;
280
281 for (int i = off; i < off_plus_len; i++) {
282 write(str.charAt(i));
283 }
284 }
285
286 /***
287 * Flushes the stream. If the stream has saved any characters from the
288 * various write() methods in a buffer, write them immediately to their
289 * intended destination. Then, if that destination is another character or
290 * byte stream, flush it. Thus one flush() invocation will flush all the
291 * buffers in a chain of Writers and OutputStreams.
292 *
293 * @throws IOException if an I/O error occurs.
294 */
295 public void flush() throws IOException {
296 flushBuffer();
297 _outStream.flush();
298 }
299
300 /***
301 * Closes the stream, flushing it first. Once a stream has been closed,
302 * further write() or flush() invocations will cause an IOException to be
303 * thrown. Closing a previously-closed stream, however, has no effect.
304 *
305 * @exception IOException If an I/O error occurs
306 */
307 public void close() throws IOException {
308 if (_outStream != null) {
309 flushBuffer();
310 _outStream.close();
311 _outStream = null;
312 }
313 }
314
315 /***
316 * Flushes the internal bytes buffer.
317 *
318 * @throws IOException if an I/O error occurs
319 */
320 private void flushBuffer() throws IOException {
321 if (_outStream != null) {
322 _outStream.write(_bytes, 0, _index);
323 _index = 0;
324 } else {
325 throw new IOException("Stream closed");
326 }
327 }
328 }